\[\\[.4in]\]

Retrieving Forecast Data

Every week, forecasters submit their hospitalization predictions to COVID-19 ForecastHub. In this report, we rely on an AWS bucket that contains the estimates of a handful of signals (e.g., COVID death, cases, hospitalization, etc). Furthermore, the AWS server stores an array of evaluation metrics of these forecasts (e.g., Absolute Error, Weighted Interval Score, and 80% Coverage). Alternatively, the data can be retrieved from the publicly accessible covidcast and covideval APIs.

s3a <- get_bucket("forecast-eval", region = "us-east-2")
s3b <- get_bucket("forecasting-team-data")
scores1 <- s3readRDS("score_cards_state_hospitalizations.rds", s3a, 
                     region = "us-east-2")
scores1 <- subset(
  scores1, 
  forecaster %in% union(params$forecasters, "COVIDhub-baseline"))

# The Hub always makes forecasts on Monday (dofw 2)
# We move all forecast_dates to the following Monday and shorten the ahead
wday_shift <- function(x, base_dofw = 2) (base_dofw - wday(x)) %% 7
scores1 <- scores1 %>%
  mutate(ahead = ahead - wday_shift(forecast_date),
         forecast_date = forecast_date + wday_shift(forecast_date))

# need to pass in the right forecaster here
scores <- s3readRDS(params$aws_scores, s3b) %>%
  magrittr::extract2("reformatted.buggy.matched.scorecards") %>%
  select(ahead, geo_value, forecaster, forecast_date, target_end_date,
         actual, wis, ae, cov_80, value_50, data_source, signal, 
         incidence_period, direction_on_forecast_date)
direction <- scores %>% 
  select(forecast_date, direction_on_forecast_date) %>%
  distinct()
  # Logan's names are brutally long...
our_pred_dates <- unique(scores$forecast_date)
forecast_dates <- our_pred_dates
aheads <- unique(scores$ahead)
geo_values <- unique(scores$geo_value)
veggies <- c("asparagus", "broccoli", "chard", "daikon", 
             "escarole", "fennel", "garlic", "horseradish",
             "jicama", "kohlrabi", "lettuce", "mushroom",
             "nori", "okra", "parsnip", "radish", "squash",
             "tomato", "watercress", "baseline1", "yuca")
names_table <- tibble(forecaster = unique(scores$forecaster), veggies = veggies)
names_table <- names_table[names_table$veggies %in% c("fennel", "jicama", "mushroom", "tomato", "watercress", "yuca"), ]
scores <- left_join(scores, names_table) %>% 
  filter(veggies %in% c("fennel", "jicama", "mushroom", "tomato", "watercress", "yuca")) %>% # small subset
  select(-forecaster, -direction_on_forecast_date) %>%
  rename(forecaster = veggies)

results <- scores1 %>% 
  select(ahead, geo_value, forecaster, forecast_date, target_end_date,
         actual, wis, ae, cov_80, value_50, data_source, signal, 
         incidence_period) %>%
  bind_rows(scores) %>%
  filter(forecast_date %in% forecast_dates,
         ahead %in% aheads,
         geo_value %in% geo_values) %>%
  left_join(direction) %>%
  filter(!is.na(direction_on_forecast_date))

The target forecast dates are:
2020-11-16, 2020-11-23, 2020-11-30, 2020-12-07, 2020-12-14, 2020-12-21, 2020-12-28, 2021-01-04, 2021-01-11, 2021-01-18, 2021-01-25, 2021-02-01, 2021-02-15, 2021-02-22, 2021-03-01, 2021-03-08, 2021-03-15, 2021-03-22, 2021-03-29, 2021-04-05, 2021-04-12, 2021-04-19, 2021-05-03, 2021-05-10, 2021-05-17, 2021-05-24, 2021-05-31, 2021-06-07, 2021-06-14, 2021-06-21, 2021-06-28, 2021-07-05, 2021-07-12, 2021-07-19, 2021-07-26, 2021-08-02, 2021-08-09, 2021-08-16, 2021-08-23, 2021-08-30, 2021-09-06, 2021-09-13, 2021-09-20, 2021-09-27

The template will compile data of the following forecasters:
COVIDhub-ensemble.

For this analysis, all of Logan’s forecasters have been renamed:

kableExtra::kbl(names_table) %>%
  kableExtra::kable_styling(bootstrap_options = c("striped", "hover"))
forecaster veggies
______________________none oareigo zareino neimacpas fennel
______________________none zacougo dacouno neiwinnah jicama
__________________never-up oareigo zareino neimacpas mushroom
__nqcomb_logistic1_u/s/d/l oareigo zareino neimacpas tomato
_marginal_cheating_u/s/d/l oareigo zareino neimacpas watercress
marginal_logistic1_u/s/d/l oareigo zareino neimacpas yuca

\[\\[.07in]\]

Weighted Interval Score (relative to baseline) and 80% Coverage

Mean <- function(x) mean(x, na.rm = TRUE)
GeoMean <- function(x, offset = 0) exp(Mean(log(x + offset)))

facets.label = str_glue("{aheads} days ahead")
names(facets.label) = aheads

subtitle = sprintf("Forecasts made over %s to %s",
                   format(min(forecast_dates), "%B %d, %Y"),
                   format(max(forecast_dates), "%B %d, %Y"))

WIS by Ahead (GeoMean)

plot_wis_a <-
  plot_canonical(results, 
                 x = "ahead", 
                 y = "wis", 
                 aggr = GeoMean,
                 grp_vars = c("forecaster", "direction_on_forecast_date"),
                 facet_rows = "direction_on_forecast_date",
                 dots = TRUE,
                 base_forecaster = "COVIDhub-baseline") + 
  labs(title = subtitle, 
       x = "Days ahead", 
       y = "Geometric Mean WIS") +
  theme_bw() +
  theme(plot.title = element_text(hjust = "center"),
        legend.position = "bottom",
        legend.title = element_blank()) + 
  geom_hline(yintercept = 1, size = 1.5) +
  scale_y_log10() +
  scale_color_viridis_d() +
  guides(color = guide_legend(ncol = 2))


ggplotly(plot_wis_a, tooltip="text", height=800, width= 1000) %>% 
  layout(hoverlabel = list(bgcolor = "white"))

WIS by Ahead (Mean)

plot_wis_a <-
  plot_canonical(results, 
                 x = "ahead", 
                 y = "wis", 
                 aggr = Mean,
                 grp_vars = c("forecaster", "direction_on_forecast_date"), 
                 facet_rows = "direction_on_forecast_date",
                 dots = TRUE,
                 base_forecaster = "COVIDhub-baseline") + 
  labs(title = subtitle, 
       x = "Days ahead", 
       y = "Geometric Mean WIS") +
  theme_bw() +
  theme(plot.title = element_text(hjust = "center"),
        legend.position = "bottom",
        legend.title = element_blank()) + 
  geom_hline(yintercept = 1, size = 1.5) +
  scale_y_log10() +
  scale_color_viridis_d() +
  guides(color = guide_legend(ncol = 2))


ggplotly(plot_wis_a, tooltip="text", height=800, width= 1000) %>% 
  layout(hoverlabel = list(bgcolor = "white"))

% Coverage by Ahead

plot_cov80_a <-
  plot_canonical(results, 
                 x = "ahead", 
                 y = "cov_80", 
                 aggr = mean,
                 grp_vars = c("forecaster", "direction_on_forecast_date"),
                 facet_rows = "direction_on_forecast_date",
                 dots = TRUE) +
  labs(title = subtitle, x= "Days ahead", y = "Mean Coverage 80") +
  theme_bw() +
  theme(plot.title = element_text(hjust = "center"),
        legend.position = "bottom",
        legend.title = element_blank()) + 
  scale_color_viridis_d() +
  geom_hline(yintercept = 0.8, size = 1.5) +
  guides(color = guide_legend(ncol = 2))

ggplotly(plot_cov80_a, tooltip="text", height=800, width=1000) 

Trajectories

actuals <- results %>%
  select(forecast_date, geo_value, actual) %>%
  distinct()

traj_plot <- function(location) {
  results %>%
    filter(geo_value == location) %>%
    ggplot() +
    geom_line(data = actuals %>% filter(geo_value == location), 
              aes(forecast_date, actual)) +
    geom_line(aes(target_end_date, value_50, group = forecast_date, color = forecaster)) +
    facet_wrap(~forecaster, ncol = 2) +
    theme_bw() +
    theme(legend.position = "bottom") +
    scale_color_brewer(palette = "Set1")  
}

New York

traj_plot("ny")

Texas

traj_plot("tx")

California

traj_plot("ca")

Florida

traj_plot("fl")

Georgia

traj_plot("ga")

Tenn

traj_plot("tn")

Miss

traj_plot("ms")

Utah

traj_plot("ut")

New Hampshire

traj_plot("nh")